Dakshay Ahuja - 2010990178

Part 1- Supervised Learning¶

Q1)EDA¶

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from imblearn.over_sampling import SMOTE
from sklearn.metrics import accuracy_score, mean_squared_error, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from warnings import filterwarnings
filterwarnings("ignore")
In [2]:
bank_data = pd.read_csv("Datasets/bank.csv", sep=";")
bank_data.head()
Out[2]:
age job marital education default housing loan contact month day_of_week ... campaign pdays previous poutcome emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed y
0 56 housemaid married basic.4y no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
1 57 services married high.school unknown no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
2 37 services married high.school no yes no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
3 40 admin. married basic.6y no no no telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no
4 56 services married high.school no no yes telephone may mon ... 1 999 0 nonexistent 1.1 93.994 -36.4 4.857 5191.0 no

5 rows × 21 columns

In [3]:
bank_data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41188 entries, 0 to 41187
Data columns (total 21 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   age             41188 non-null  int64  
 1   job             41188 non-null  object 
 2   marital         41188 non-null  object 
 3   education       41188 non-null  object 
 4   default         41188 non-null  object 
 5   housing         41188 non-null  object 
 6   loan            41188 non-null  object 
 7   contact         41188 non-null  object 
 8   month           41188 non-null  object 
 9   day_of_week     41188 non-null  object 
 10  duration        41188 non-null  int64  
 11  campaign        41188 non-null  int64  
 12  pdays           41188 non-null  int64  
 13  previous        41188 non-null  int64  
 14  poutcome        41188 non-null  object 
 15  emp.var.rate    41188 non-null  float64
 16  cons.price.idx  41188 non-null  float64
 17  cons.conf.idx   41188 non-null  float64
 18  euribor3m       41188 non-null  float64
 19  nr.employed     41188 non-null  float64
 20  y               41188 non-null  object 
dtypes: float64(5), int64(5), object(11)
memory usage: 6.6+ MB
In [4]:
bank_data.describe()
Out[4]:
age duration campaign pdays previous emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed
count 41188.00000 41188.000000 41188.000000 41188.000000 41188.000000 41188.000000 41188.000000 41188.000000 41188.000000 41188.000000
mean 40.02406 258.285010 2.567593 962.475454 0.172963 0.081886 93.575664 -40.502600 3.621291 5167.035911
std 10.42125 259.279249 2.770014 186.910907 0.494901 1.570960 0.578840 4.628198 1.734447 72.251528
min 17.00000 0.000000 1.000000 0.000000 0.000000 -3.400000 92.201000 -50.800000 0.634000 4963.600000
25% 32.00000 102.000000 1.000000 999.000000 0.000000 -1.800000 93.075000 -42.700000 1.344000 5099.100000
50% 38.00000 180.000000 2.000000 999.000000 0.000000 1.100000 93.749000 -41.800000 4.857000 5191.000000
75% 47.00000 319.000000 3.000000 999.000000 0.000000 1.400000 93.994000 -36.400000 4.961000 5228.100000
max 98.00000 4918.000000 56.000000 999.000000 7.000000 1.400000 94.767000 -26.900000 5.045000 5228.100000
In [5]:
bank_data.isna().sum().any()
Out[5]:
False
In [6]:
plt.figure(figsize=(10,7))
sns.heatmap(bank_data.select_dtypes(exclude=['object']).corr(), linewidths=0.2, annot=True, fmt='.2f', cmap='flare')
plt.title("Correlation Matrix", fontsize=20)
plt.show()
No description has been provided for this image

Highest Correlation is in betweeen euribor 3 month rate and employment variation rate

In [7]:
plt.figure(figsize=(10,5))
plt.xticks(rotation = 45)
job_count = bank_data['job'].value_counts()
sns.barplot(data=bank_data, x=job_count.index, y=job_count.values)
plt.title("Distribution of Jobs", fontsize=18)
plt.xlabel("Job")
plt.ylabel("Count")
plt.show()
No description has been provided for this image

We can observe that admin and bluecollar are the most popular jobs.

Q2) Pre-Processing¶

Missing Value Analysis¶

In [8]:
bank_data.isna().sum()
Out[8]:
age               0
job               0
marital           0
education         0
default           0
housing           0
loan              0
contact           0
month             0
day_of_week       0
duration          0
campaign          0
pdays             0
previous          0
poutcome          0
emp.var.rate      0
cons.price.idx    0
cons.conf.idx     0
euribor3m         0
nr.employed       0
y                 0
dtype: int64

Label Encoding¶

In [9]:
le = LabelEncoder()
for col in bank_data.select_dtypes(include=['object']).columns:
 bank_data[col] = le.fit_transform(bank_data[col])
 
bank_data.head()
Out[9]:
age job marital education default housing loan contact month day_of_week ... campaign pdays previous poutcome emp.var.rate cons.price.idx cons.conf.idx euribor3m nr.employed y
0 56 3 1 0 0 0 0 1 6 1 ... 1 999 0 1 1.1 93.994 -36.4 4.857 5191.0 0
1 57 7 1 3 1 0 0 1 6 1 ... 1 999 0 1 1.1 93.994 -36.4 4.857 5191.0 0
2 37 7 1 3 0 2 0 1 6 1 ... 1 999 0 1 1.1 93.994 -36.4 4.857 5191.0 0
3 40 0 1 1 0 0 0 1 6 1 ... 1 999 0 1 1.1 93.994 -36.4 4.857 5191.0 0
4 56 7 1 3 0 0 2 1 6 1 ... 1 999 0 1 1.1 93.994 -36.4 4.857 5191.0 0

5 rows × 21 columns

In [10]:
x = bank_data.iloc[:,:-1]
y = bank_data.iloc[:,-1]

Selecting important features based on Random Forest¶

In [11]:
regr = RandomForestClassifier(random_state=0)
regr.fit(x,y)
Out[11]:
RandomForestClassifier(random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
RandomForestClassifier(random_state=0)
In [12]:
print(regr.feature_importances_)
sorted_idx=regr.feature_importances_.argsort()[::-1]
plt.figure(figsize=(10,5))
y_values = list(x.columns[sorted_idx])
sns.barplot(x=regr.feature_importances_[sorted_idx], y=y_values, palette="Spectral")
plt.title('Feature Importances')
plt.xlabel('Importance')
plt.ylabel('Features')
plt.show()
[0.09292229 0.04853943 0.02430097 0.04397167 0.0089484  0.01974093
 0.01461915 0.00961456 0.01780743 0.04039627 0.32150086 0.04266178
 0.03105977 0.01220809 0.02415972 0.01619528 0.02050237 0.02346571
 0.12332706 0.06405828]
No description has been provided for this image

Handling unbalanced data using SMOTE¶

In [13]:
sm = SMOTE(random_state=0)
x,y = sm.fit_resample(x,y)
In [14]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=0)

Standardizing the data¶

In [15]:
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

Q3,4) Building models and tabulating performance metrics¶

Logistic Regression¶

In [16]:
classifier = LogisticRegression(random_state = 0, max_iter=10000)
classifier.fit(x_train, y_train)
Out[16]:
LogisticRegression(max_iter=10000, random_state=0)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
LogisticRegression(max_iter=10000, random_state=0)
In [17]:
y_pred = classifier.predict(x_test)
print(confusion_matrix(y_test, y_pred))
[[6336  866]
 [ 793 6625]]
In [18]:
acc = accuracy_score(y_test, y_pred) * 100
print("Accuracy score is {}%".format(round(acc,2)))
Accuracy score is 88.65%
In [19]:
cv_acc = np.mean(cross_val_score(LogisticRegression(), x, y, cv=10)) * 100
In [20]:
print("Cross Validation Score is {}%".format(round(cv_acc,2)))
Cross Validation Score is 83.16%

Decision Tree¶

In [21]:
dtree = DecisionTreeClassifier(random_state=20)
dtree.fit(x_train,y_train)
Out[21]:
DecisionTreeClassifier(random_state=20)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
DecisionTreeClassifier(random_state=20)
In [22]:
y_pred = dtree.predict(x_test)
cm = confusion_matrix(y_test,y_pred)
print(cm)
[[6637  565]
 [ 517 6901]]
In [23]:
acc = accuracy_score(y_test, y_pred) * 100
print("Accuracy: {}%".format(round(acc,2)))
Accuracy: 92.6%

Random Forest¶

In [24]:
regr = RandomForestClassifier(max_features=7, max_depth=8, n_jobs=-1)
regr.fit(x_train, y_train)
y_pred = regr.predict(x_test)
In [25]:
acc = accuracy_score(y_test, y_pred) * 100
print("Accuracy is {}%".format(round(acc,2)))
print("Mean Squared Error is {}%".format(round(mean_squared_error(y_test, y_pred),2)))
Accuracy is 91.98%
Mean Squared Error is 0.08%

Performance Metrics¶

Model Accuracy
Logistic Regression 83.16%
Decision Tree 92.52%
Random Forest 92.02%

From above table we can see that Decision Tree and Random Forest have almost same accuracy.
Keeping the model complexity in mind, we should choose Decision Tree.

Part 2- Unsupervised Learning¶

Q1,2)EDA¶

In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from warnings import filterwarnings
filterwarnings("ignore")
In [27]:
data = pd.read_csv("Datasets/credit_card.csv")
data = data.iloc[:,1:]
data
Out[27]:
BALANCE BALANCE_FREQUENCY PURCHASES ONEOFF_PURCHASES INSTALLMENTS_PURCHASES CASH_ADVANCE PURCHASES_FREQUENCY ONEOFF_PURCHASES_FREQUENCY PURCHASES_INSTALLMENTS_FREQUENCY CASH_ADVANCE_FREQUENCY CASH_ADVANCE_TRX PURCHASES_TRX CREDIT_LIMIT PAYMENTS MINIMUM_PAYMENTS PRC_FULL_PAYMENT TENURE
0 40.900749 0.818182 95.40 0.00 95.40 0.000000 0.166667 0.000000 0.083333 0.000000 0 2 1000.0 201.802084 139.509787 0.000000 12
1 3202.467416 0.909091 0.00 0.00 0.00 6442.945483 0.000000 0.000000 0.000000 0.250000 4 0 7000.0 4103.032597 1072.340217 0.222222 12
2 2495.148862 1.000000 773.17 773.17 0.00 0.000000 1.000000 1.000000 0.000000 0.000000 0 12 7500.0 622.066742 627.284787 0.000000 12
3 1666.670542 0.636364 1499.00 1499.00 0.00 205.788017 0.083333 0.083333 0.000000 0.083333 1 1 7500.0 0.000000 NaN 0.000000 12
4 817.714335 1.000000 16.00 16.00 0.00 0.000000 0.083333 0.083333 0.000000 0.000000 0 1 1200.0 678.334763 244.791237 0.000000 12
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8945 28.493517 1.000000 291.12 0.00 291.12 0.000000 1.000000 0.000000 0.833333 0.000000 0 6 1000.0 325.594462 48.886365 0.500000 6
8946 19.183215 1.000000 300.00 0.00 300.00 0.000000 1.000000 0.000000 0.833333 0.000000 0 6 1000.0 275.861322 NaN 0.000000 6
8947 23.398673 0.833333 144.40 0.00 144.40 0.000000 0.833333 0.000000 0.666667 0.000000 0 5 1000.0 81.270775 82.418369 0.250000 6
8948 13.457564 0.833333 0.00 0.00 0.00 36.558778 0.000000 0.000000 0.000000 0.166667 2 0 500.0 52.549959 55.755628 0.250000 6
8949 372.708075 0.666667 1093.25 1093.25 0.00 127.040008 0.666667 0.666667 0.000000 0.333333 2 23 1200.0 63.165404 88.288956 0.000000 6

8950 rows × 17 columns

In [28]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8950 entries, 0 to 8949
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   BALANCE                           8950 non-null   float64
 1   BALANCE_FREQUENCY                 8950 non-null   float64
 2   PURCHASES                         8950 non-null   float64
 3   ONEOFF_PURCHASES                  8950 non-null   float64
 4   INSTALLMENTS_PURCHASES            8950 non-null   float64
 5   CASH_ADVANCE                      8950 non-null   float64
 6   PURCHASES_FREQUENCY               8950 non-null   float64
 7   ONEOFF_PURCHASES_FREQUENCY        8950 non-null   float64
 8   PURCHASES_INSTALLMENTS_FREQUENCY  8950 non-null   float64
 9   CASH_ADVANCE_FREQUENCY            8950 non-null   float64
 10  CASH_ADVANCE_TRX                  8950 non-null   int64  
 11  PURCHASES_TRX                     8950 non-null   int64  
 12  CREDIT_LIMIT                      8949 non-null   float64
 13  PAYMENTS                          8950 non-null   float64
 14  MINIMUM_PAYMENTS                  8637 non-null   float64
 15  PRC_FULL_PAYMENT                  8950 non-null   float64
 16  TENURE                            8950 non-null   int64  
dtypes: float64(14), int64(3)
memory usage: 1.2 MB
In [29]:
data.describe()
Out[29]:
BALANCE BALANCE_FREQUENCY PURCHASES ONEOFF_PURCHASES INSTALLMENTS_PURCHASES CASH_ADVANCE PURCHASES_FREQUENCY ONEOFF_PURCHASES_FREQUENCY PURCHASES_INSTALLMENTS_FREQUENCY CASH_ADVANCE_FREQUENCY CASH_ADVANCE_TRX PURCHASES_TRX CREDIT_LIMIT PAYMENTS MINIMUM_PAYMENTS PRC_FULL_PAYMENT TENURE
count 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8950.000000 8949.000000 8950.000000 8637.000000 8950.000000 8950.000000
mean 1564.474828 0.877271 1003.204834 592.437371 411.067645 978.871112 0.490351 0.202458 0.364437 0.135144 3.248827 14.709832 4494.449450 1733.143852 864.206542 0.153715 11.517318
std 2081.531879 0.236904 2136.634782 1659.887917 904.338115 2097.163877 0.401371 0.298336 0.397448 0.200121 6.824647 24.857649 3638.815725 2895.063757 2372.446607 0.292499 1.338331
min 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 0.000000 50.000000 0.000000 0.019163 0.000000 6.000000
25% 128.281915 0.888889 39.635000 0.000000 0.000000 0.000000 0.083333 0.000000 0.000000 0.000000 0.000000 1.000000 1600.000000 383.276166 169.123707 0.000000 12.000000
50% 873.385231 1.000000 361.280000 38.000000 89.000000 0.000000 0.500000 0.083333 0.166667 0.000000 0.000000 7.000000 3000.000000 856.901546 312.343947 0.000000 12.000000
75% 2054.140036 1.000000 1110.130000 577.405000 468.637500 1113.821139 0.916667 0.300000 0.750000 0.222222 4.000000 17.000000 6500.000000 1901.134317 825.485459 0.142857 12.000000
max 19043.138560 1.000000 49039.570000 40761.250000 22500.000000 47137.211760 1.000000 1.000000 1.000000 1.500000 123.000000 358.000000 30000.000000 50721.483360 76406.207520 1.000000 12.000000

Missing Value Analysis¶

In [30]:
data.isna().sum().any()
Out[30]:
True
In [31]:
sns.pairplot(data=data)
plt.show()
No description has been provided for this image
In [32]:
plt.figure(figsize=(22,16))
sns.heatmap(data.corr(), annot=True, fmt='0.2f', linewidths=0.5, cmap='mako')
plt.title("Heatmap of Numerical Features", fontsize=30)
plt.show()
No description has been provided for this image
In [33]:
data.isna().sum()
Out[33]:
BALANCE                               0
BALANCE_FREQUENCY                     0
PURCHASES                             0
ONEOFF_PURCHASES                      0
INSTALLMENTS_PURCHASES                0
CASH_ADVANCE                          0
PURCHASES_FREQUENCY                   0
ONEOFF_PURCHASES_FREQUENCY            0
PURCHASES_INSTALLMENTS_FREQUENCY      0
CASH_ADVANCE_FREQUENCY                0
CASH_ADVANCE_TRX                      0
PURCHASES_TRX                         0
CREDIT_LIMIT                          1
PAYMENTS                              0
MINIMUM_PAYMENTS                    313
PRC_FULL_PAYMENT                      0
TENURE                                0
dtype: int64
In [34]:
data['MINIMUM_PAYMENTS'] = data['MINIMUM_PAYMENTS'].fillna(data['MINIMUM_PAYMENTS'].mean())
data = data.dropna()
In [35]:
data.isna().sum().any()
Out[35]:
False

Outlier Treatment using Z-Score¶

In [36]:
from scipy.stats import zscore
card_data =  data[(np.abs(zscore(data)) < 3).all(axis=1)]
card_data
Out[36]:
BALANCE BALANCE_FREQUENCY PURCHASES ONEOFF_PURCHASES INSTALLMENTS_PURCHASES CASH_ADVANCE PURCHASES_FREQUENCY ONEOFF_PURCHASES_FREQUENCY PURCHASES_INSTALLMENTS_FREQUENCY CASH_ADVANCE_FREQUENCY CASH_ADVANCE_TRX PURCHASES_TRX CREDIT_LIMIT PAYMENTS MINIMUM_PAYMENTS PRC_FULL_PAYMENT TENURE
0 40.900749 0.818182 95.40 0.00 95.40 0.000000 0.166667 0.000000 0.083333 0.000000 0 2 1000.0 201.802084 139.509787 0.000000 12
1 3202.467416 0.909091 0.00 0.00 0.00 6442.945483 0.000000 0.000000 0.000000 0.250000 4 0 7000.0 4103.032597 1072.340217 0.222222 12
2 2495.148862 1.000000 773.17 773.17 0.00 0.000000 1.000000 1.000000 0.000000 0.000000 0 12 7500.0 622.066742 627.284787 0.000000 12
3 1666.670542 0.636364 1499.00 1499.00 0.00 205.788017 0.083333 0.083333 0.000000 0.083333 1 1 7500.0 0.000000 864.206542 0.000000 12
4 817.714335 1.000000 16.00 16.00 0.00 0.000000 0.083333 0.083333 0.000000 0.000000 0 1 1200.0 678.334763 244.791237 0.000000 12
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
8876 121.294493 0.750000 385.26 385.26 0.00 169.554775 0.500000 0.500000 0.000000 0.125000 3 7 500.0 98.225317 113.944625 0.000000 8
8877 55.147722 0.875000 163.31 0.00 163.31 0.000000 0.875000 0.000000 0.750000 0.000000 0 7 1000.0 62.619008 109.580539 0.000000 8
8878 869.328293 0.750000 0.00 0.00 0.00 1153.925205 0.000000 0.000000 0.000000 0.125000 3 0 1200.0 150.755024 337.986933 0.000000 8
8879 137.744895 1.000000 441.00 0.00 441.00 0.000000 0.750000 0.000000 0.625000 0.000000 0 6 1000.0 169.249683 124.639905 0.000000 8
8880 331.070135 0.750000 81.20 81.20 0.00 419.341394 0.125000 0.125000 0.000000 0.250000 7 1 500.0 116.761487 205.065526 0.000000 8

7434 rows × 17 columns

Deal with correlated variables¶

In [37]:
plt.figure(figsize=(16,12))
sns.heatmap(card_data.corr(), annot=True, linewidths=0.5, fmt='0.2f')
Out[37]:
<Axes: >
No description has been provided for this image

We can see high correlation between some columns. Performing PCA will take care of it

Q3) PCA¶

In [38]:
sc = StandardScaler()
card_data = sc.fit_transform(card_data)
In [39]:
from sklearn.decomposition import PCA
pca = PCA()
PCA_result = pca.fit_transform(card_data)
np.cumsum(pca.explained_variance_ratio_)
Out[39]:
array([0.28968147, 0.49329906, 0.58861536, 0.66564893, 0.7288119 ,
       0.78268169, 0.82938344, 0.86752425, 0.90208756, 0.93039185,
       0.94850925, 0.96579099, 0.97993371, 0.99137675, 0.99772969,
       0.99999802, 1.        ])

We need 12 Principal Components to explain 95% variance

In [40]:
pca_model = PCA(n_components=12, random_state=0)
final_data = pd.DataFrame(pca_model.fit_transform(card_data))
final_data.head()
Out[40]:
0 1 2 3 4 5 6 7 8 9 10 11
0 -1.325625 -2.046626 0.204168 -0.629249 0.063922 -0.321145 0.643574 -0.205845 -0.274637 -0.125446 0.039036 -0.281492
1 -2.781074 3.311279 0.444409 1.504716 -1.857740 0.175910 -0.372141 0.449321 -1.045414 1.490024 1.296720 -1.884472
2 1.237429 0.544760 1.472225 -2.058193 0.490464 -0.017539 -1.688912 -0.290765 0.758095 0.996202 -0.779984 -0.451643
3 -0.606674 -0.007237 1.815941 -1.006197 -0.773933 0.843505 0.180941 -0.717758 0.660198 -1.274210 0.792208 0.590334
4 -1.448065 -1.469047 0.273819 -1.139256 0.214302 -0.436232 0.309731 0.229897 -0.805582 -0.032215 -0.095327 -0.363798
In [41]:
from sklearn.cluster import KMeans
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(final_data)
    wcss.append(kmeans.inertia_)
plt.plot(range(1, 11), wcss, 'rx-')
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.show()
No description has been provided for this image
In [42]:
from sklearn.metrics import silhouette_score
sil_scores = []
for i in range(2, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42)
    kmeans.fit(final_data)
    sil_scores.append(silhouette_score(final_data, kmeans.labels_))
plt.plot(range(2, 11), sil_scores, 'bx-')
plt.title('Silhouette Analysis For Optimal k')
plt.xlabel('Number of clusters')
plt.ylabel('Silhouette Score')
plt.show()
No description has been provided for this image
In [43]:
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(final_data)
In [44]:
final_data['Cluster'] = clusters
In [45]:
final_data['Cluster'].value_counts()
Out[45]:
Cluster
1    4177
2    1791
0    1466
Name: count, dtype: int64

We get 3 clusters.